Change the community numbers

Stating from this experiment, I changed the community number from [1,39] to [0,38]. This change is due to the way MultilabelBinarizer works.


In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

In [528]:
# Load weight
with open('../src/mane/prototype/embeddings/BC3_deepwalk.weights', 'rb') as f:
    w = p.load(f)
# Load graph
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')

In [529]:
emb = (w[0] + w[1] )/2
emb = normalize(emb)

In [530]:
x_train, yl_train, x_test, yl_test = bc.get_ids_labels(0.5)

In [531]:
lg = OneVsRestClassifier(LogisticRegression(C=2), n_jobs=-1)

In [532]:
X_train = [emb[i] for i in x_train]
Y_train = MultiLabelBinarizer(classes=range(0,39)).fit_transform(yl_train)

In [533]:
lg.fit(X_train, Y_train)


Out[533]:
OneVsRestClassifier(estimator=LogisticRegression(C=2, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
          n_jobs=-1)

In [534]:
X_test = [emb[i] for i in x_test]
Y_test = MultiLabelBinarizer(classes=range(0,39)).fit_transform(yl_test)

In [535]:
pred = lg.predict_proba([i for i in X_test])
num_pred = []
for i, j in enumerate(pred):
    k = len(yl_test[i])
    num_pred.append(j.argsort()[-k:])

In [536]:
Y_pred = MultiLabelBinarizer(classes=range(0,39)).fit_transform(num_pred)

In [544]:
f1_score(y_pred=Y_pred, y_true=Y_test, average='weighted')


/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Out[544]:
0.14176635631724019

In [538]:
len(x_train)


Out[538]:
5156

In [522]:
x_train[4]


Out[522]:
1971

In [523]:
yl_train[4]


Out[523]:
[3]

In [524]:
x_test[5]


Out[524]:
2870

In [525]:
yl_test[5]


Out[525]:
[6, 21]

In [527]:
num_pred[5]


Out[527]:
array([7, 6])

In [ ]: